package com.example.read

import org.apache.spark
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.spark.sql.{Row, SQLContext, SparkSession}
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import java.io.StringReader
import au.com.bytecode.opencsv.CSVReader
//extends Serializable
object read_csv {
	def main(args: Array[String]): Unit = {
		//一个StruceField你可以把它当成一个特征列。分别用列的名称和数据类型初始化
		val structFields = List(StructField("longitude", StringType),
			StructField("latitude", StringType),
			StructField("mode", StringType),
			StructField("mode_name", StringType),
			StructField("mode_num", StringType))
		//最后通过StructField的集合来初始化表的模式。
		val types = StructType(structFields)
		val sparkConf = new SparkConf().setAppName("RDDToDataFrame").setMaster("local")
		val sparkContext = new SparkContext(sparkConf)
		val sqlContext = new SQLContext(sparkContext)
		val rdd = sparkContext.textFile("/Users/zero/Desktop/交通时空大数据分析挖掘系统-数据/服创大赛-出行方式静态数据.csv")
//			.hadoopFile(path,classOf[TextInputFormat],classOf[LongWritable],classOf[Text],1)
//			.map(p => new String(p._2.getBytes, 0, p._2.getLength, "GBK"))
		//Rdd的数据，里面的数据类型要和之前的StructField里面数据类型对应。否则会报错。
		val rowRdd = rdd.map(line => Row(line.trim.split(",")(0),
			line.trim.split(",")(1),
			line.trim.split(",")(2),
			line.trim.split(",")(3),
			line.trim.split(",")(4)))
		//通过SQLContext来创建DataFrame.
		val df = sqlContext.createDataFrame(rowRdd, types)
		println("hello")
		println(df)
		df.show()
		sparkContext.stop()
	}
}
